---
title: "Acquisition of quantifier raising of a universal across an existential: Evidence from German"
author: "Kriszta Szendr&#xF6;i, Rebecca Schumacher, Tom Fritzsche & Barbara H&#xF6;hle"
date: "31/Mar/2017"
output: html_document
---

<!-- comments -->
[//]: Platform independent comment, NOT in html source!
[//]: Before anything run: library(rmarkdown)
[//]: render('Szendroi_etal_2017_analysis.Rmd'); browseURL(sprintf('file://%s/Szendroi_etal_2017_analysis.html', file.path(getwd()))) # create & open html


## Data

```{r, echo=FALSE, results="hide", message=FALSE}

# Clearing the workspace
rm(list=ls())

# Loading packages
# library(rmarkdown)	# for markdown
library(reshape2)	# reshaping, long/wide format
library(ggplot2)	# plotting
library(lme4)		# LME
# library(ez)			# for ez ANOVA
library(RePsychLing)	# for evaluation of model

# Reading the raw data
dat0 <- read.table("Szendroi_etal_2017_raw_data.txt", header=TRUE, sep ="\t", dec=",", na="", fill=TRUE)

```

Version information

```{r, echo=TRUE}

R.Version()$version.string
packageVersion("lme4")

```

### Original data structure

```{r, echo=FALSE}

str(dat0)

```

The responses are coded as *D* (distributional scope) or *O* (overt scope).
Recoding to zeros and ones:
*D* = 1
*O* = 0 (zero)

This means that the dependent variable is the proportion of distributional scope responses (and the missing value to 100% are overt scope responses)

All fillers are correct (=1), they will be removed

```{r, echo=FALSE, results="hide", message=FALSE, warning=FALSE}

# Restructuring the data to long format
dat1 <- melt(dat0, id.vars=c("id","agegroup"), measure.vars=c("Filler1","Filler2","Filler3","Passive1","Passive2","Passive3","Active1","Active2","Active3","Active4","Active5","Active6"), variable="condnum", na.rm=TRUE)
# warning message "attributes are not identical across measure variables" because data is numerical (1) for fillers but categorial (D/O) for test items - this is ok

# New condition column
dat1$cond <- as.factor(ifelse(substr(dat1$condnum, 1, 1)=="F", "Filler",
					   ifelse(substr(dat1$condnum, 1, 1)=="P", "Passive",
					   ifelse(substr(dat1$condnum, 1, 1)=="A", "Active", NA))))

# New position column (extracting number from the original trial)
dat1$pos <- as.numeric(sub("[[:alpha:]]+", "",  dat1$condnum))

# Converting the dependent variable to binomial
dat1$response <- ifelse(dat1$value=="1", 1,
				 ifelse(dat1$value=="D", 1,
				 ifelse(dat1$value=="O", 0, NA)))

# Looking at the means
tapply(dat1$response, list(dat1$cond, dat1$agegroup), mean)

# Removing the fillers and keeping only relevant columns
d <- droplevels(dat1[dat1$cond!="Filler", c("id","agegroup","cond","pos","response") ] )

# Sorting by age group, participant, condition and then trial number
d <- d[order(d$agegroup, d$id, d$cond, d$pos), ]

# Just looking
# str(d)	# View(d)

```

### Restructured data


```{r, echo=FALSE}

str(d)

```

Participants in each group:

```{r, echo=FALSE}

# participants per age group
with(unique(d[ , c("id","agegroup")]), tapply(id, agegroup, length) )

```

## Statistics

### Descriptive statistics

```{r, echo=FALSE}

# time 100 for the percentages
round(tapply(d$response, list(d$cond, d$agegroup), mean), 3)*100

```


```{r, echo=FALSE, results="hide", message=FALSE, warning=FALSE}

# Aggregating

# Single subject averages
sisu <- aggregate(response ~ agegroup + id + cond, data=d, na.action=na.pass, mean, na.rm=FALSE)
sisu <- droplevels(sisu)
str(sisu)	# View(sisu)

# Grand averages
gra <- aggregate(response ~ cond + agegroup, data=sisu, function(x) unlist(c(M = mean(x, na.rm=T), SE = sd(x)/sqrt(length(x-1)), CI = 1.96*(sd(x)/sqrt(length(x-1))), N = length(x) ))) # str(gra)

# Convert complex column to simple columns (function 'aggregate' with multiple functions are only attributes)
# + converting to PERCENT
gra$M <- gra$response[,1]*100
gra$SE <- gra$response[,2]*100
gra$CI <- gra$response[,3]*100
gra$N <- gra$response[,4]
gra <- gra[ , -which(names(gra)=="response")] # remove complex colum
# str(gra)	# View(gra)	# head(gra)

# Writing grand means into different object for plotting
p0 <- gra
# Giving nicer age group level names
# Checking the order first
tapply(p0$M, p0$agegroup, mean)
# Renaming
levels(p0$agegroup) <- c("5-year olds", "6-year olds", "Adults")

```


```{r, echo=FALSE}

# Plotting using package 'ggplot2'
dodge <- .12
p <- ggplot(p0, aes(x=cond, y=M, fill=cond))
# p <- p + ggtitle("Responses")
p <- p + ylab("Percentage of distributive repsonses")
# p <- p + scale_y_continuous(limits=c(0, 11500), breaks=seq(0, 11500, 2000))
p <- p + xlab("Condition")
p <- p + facet_grid(. ~ agegroup)
# bars
p <- p + geom_bar(stat="identity") + guides(fill=FALSE)
# error bars
p <- p + geom_errorbar(aes(ymin=M-SE, ymax=M+SE, width=.4), size=.6, position=position_dodge(width = dodge), linetype="solid", colour=gray(.1))
# gray() = 0 black, 1 white, gray(.9),gray(0.6),gray(0)
p <- p + scale_fill_manual(values=c(gray(.4), gray(.7) ))
# numbers
p <- p + geom_text(aes(y = 25, label = round(p0$M, digits=0)), colour=gray(.1))
# chance line
p <- p + geom_hline(aes(yintercept=c(50)), linetype=2, col=gray(0.3))
p + theme_bw(20)

# Exporting the plot to a PNG file
# ggsave("Szendroi_etal_2017_Fig1.png", plot = last_plot(), device = NULL, path = NULL, scale = 1, width = 20, height = 15, units = "cm", dpi = 300)


```

### Inferential statistics: Generalised linear mixed-effects model

Agegroup is coded such that *6yo* are in the intercept - so that they can be compared to 5yo and adults.

Condition is coded such that *active* (= experimental items) are in the intercept.

The model has three random components: for participants, for items, and an individual adjustment of the condition effect for each participant (= random slope).

The estimates are in log odds (zero = 1:1 = 50%).

```{r, echo=FALSE, results="hide", message=FALSE, warning=FALSE}

# Creating a new colum for item ID (for random effect part)
d$item_id <- as.factor(paste(d$cond, d$pos, sep="") )
# levels(d$item_id)

# Reordering the factor levels
# Checking the means
with(d, tapply(response, agegroup, mean) )
# Changing the order
d$agegroup <- factor(d$agegroup, levels=c("6yo", "5yo", "adults"))
# Checking the means again
with(d, tapply(response, agegroup, mean) )
# levels(d$agegroup)

# What are the contrast specifications?
contrasts(d$cond)		# intercept=active
contrasts(d$agegroup)	# intercept=6yo

# Calculate LME (package 'lme4')
# bobyqa recommended for binomial
m1 <- glmer(response ~ 1 + cond * agegroup + (1 + cond | id) + (1  | item_id), data=d, family=binomial, control=glmerControl(optimizer="bobyqa"))

# Showing model output
summary(m1)

# Looking at the model fit
# summary(rePCA(m1))
# plot(resid(m1))
# plot(m1)


```


```{r, echo=FALSE}

summary(m1)

```

Explanation of the fixed effects:

(Intercept)

* This is the average of the 6yo in the actives (in log odds, the 42% in the plot, 0 would be 50%).
* All other effects are added to this (so are in relation, meaning they are not absolute effects like main effects in ANOVA).

condPassive

* The effect of passives compared to actives for *6yo*, so the increase to 90% (_p_<.001).
* 6yo give more distributive responses in passives than in actives.

agegroup5yo

* The difference between 5yo and 6yo for the actives (_p_=.183).
* The value is positive, which means that 5yo give numerically more distributive responses to actives than 6yo do, but this is *not* significant.

agegroupadults

* The difference between adults and 6yo for the actives (_p_<.01).
* Adults give significantly less distributive responses (2%) to actives than 6yo do (42%).

condPassive:agegroup5yo

* The difference between the passive effect for *5yo* (+17%) and the passive effect for 6yo (+48% increase).
* The size of the passive effect is reduced by 31% in 5yo compared to 6yo (_p_<.01).

condPassive:agegroup6yo

* The difference between the passive effect for *adults* (+91%) compared to 6yo (+48).
* The passive effect is significantly larger (by 43%) in adults compared to the one in 6yo (_p_<.01).
* The effect of passive is significant in 6yo and even larger in adults.


The only comparison that this model does not give is whether the difference between 56% for actives compared to 73% for passives in 5yo is significant.
To test this, the 5yo have to be put in the intercept (see model 2 below).

### Model 2

```{r, echo=FALSE, results="hide", message=FALSE, warning=FALSE}

# Copying data to a new object for changing the base level for age group
d2 <- d

# Checking the means
with(d2, tapply(response, agegroup, mean) )
# Changing the order
d2$agegroup <- factor(d2$agegroup, levels=c("5yo", "6yo","adults")) 
# Checking the means again
with(d2, tapply(response, agegroup, mean) )
# levels(d2$agegroup)

# Calculate 2nd LME (package 'lme4')
# same specification as model 1, just the 5yo are in the intercept
m2 <- glmer(response ~ 1 + cond * agegroup + (1 + cond | id) + (1  | item_id), data=d2, family=binomial, control=glmerControl(optimizer="bobyqa")) 

# Showing model output
summary(m2)

```


```{r, echo=FALSE}

summary(m2)

```

Relevant here is the condPassive effect, which now refers to only the 5yo:

* The (absolute) effect of passives for *5yo* (1.26 = the 17% increase) is *not* significant (_p_=.124). The first model above already showed that the 5yo differ from 6yo for this effect, now this model shows that there is *no* significant effect of passives in 5yo.
* The rest of the model is very similar (regarding the significance level: identical) to the model above, the estimates change a bit because now it is in relation to the 5yo.
But note that the effect agegroup6yo is identical to the effect agegroup5yo above just the sign of the estimate changes because now 6yo are compared to 5yo, before it was the other way around.

```{r, echo=FALSE, results="hide", message=FALSE, warning=FALSE}

# ----- END of SCIPT -----

```

